\documentclass[t,12pt,aspectratio=169]{beamer} % 16:9 宽屏比例，适合现代投影
\usepackage{ctex} % 中文支持
\usepackage{amsmath, amssymb} % 数学公式与符号
\usepackage{graphicx}
\usepackage{url}
\usepackage{verbatim}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% 插入代码
\usepackage{listings}
\usepackage{color}

% 设置列表的样式
\definecolor{codegreen}{rgb}{0,0.6,0}
\definecolor{codegray}{rgb}{0.5,0.5,0.5}
\definecolor{codepurple}{rgb}{0.58,0,0.82}
\definecolor{backcolour}{rgb}{0.95,0.95,0.92}

\lstdefinestyle{mystyle}{
    backgroundcolor=\color{backcolour},   
    commentstyle=\color{codegreen},
    keywordstyle=\color{magenta},
    numberstyle=\tiny\color{codegray},
    stringstyle=\color{codepurple},
    basicstyle=\ttfamily\footnotesize,
    breakatwhitespace=false,         
    breaklines=true,                 
    captionpos=b,                    
    keepspaces=true,                 
    numbers=left,                    
    numbersep=5pt,                  
    showspaces=false,                
    showstringspaces=false,
    showtabs=false,                  
    tabsize=2
}

\lstset{style=mystyle}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% 主题设置（推荐简洁风格）
\usetheme{Madrid}
\usecolortheme{default} % 可选：seahorse, beaver, dolphin 等

\title{R语言统计入门第8章：表格数据}
\author{PD ET AL}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}

\begin{frame}
  \titlepage
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{目录}

\begin{enumerate}
\item[8.1.] 单比例检验（哮喘病人）
\item[8.2.] 两个独立的比例的检验（Lewitt-Machin）
\item[8.3.] 多个比例的检验，趋势检验（产妇与鞋码）
\item[8.4.] 表格数据的行与列的独立性的检验（婚姻与咖啡）

\end{enumerate}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{8.1.1. 患哮喘病人比例的检验 }

\begin{itemize}

\item  {\color{red}问题：随机观测215名病人，发现39名患有哮喘。设随机一个病人患哮喘的概率是 $p$. 检验假设 $p=p_0=0.15$. }

\vspace{0.3cm}

\item 解答思路： 
\begin{enumerate}
\item  设 $X$ 是 $N=215$ 个病人中患有哮喘的病人数目。
\item  随机变量 $X$ 服从二项分布 $b(N,p)$. 给定显著性水平。
\item  计算这个分布的拒绝域。
\item  看 $x=39$ 是否落在拒绝域。
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{8.1.2.  }

\begin{figure}
\centering
\includegraphics[height=0.6\textheight, width=0.9\textwidth]{asthma-binom-test.png}
\caption{哮喘病人人数的概率分布}
%\caption{Distribution, number of patients with asthma  }
\end{figure}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{8.1.3. 患哮喘病人比例的检验（图像的代码） }

\begin{itemize}
\item  {\color{red}问题：画出上一页的图像的代码是什么？}

\item 解答：使用 \,{\color{blue}\verb+points()+} 画点，使用 \,{\color{blue}\verb+lines()+} 画线段。
这两个函数的第一个参数是所有点的横坐标组成的向量，第二个参数是所有点的纵坐标组成的向量。

{\color{blue}
\begin{verbatim}
> x<-0:215
> y<-dbinom(x,size=215,prob=0.15)
> plot(x,y,type='h')
> points(39,0,cex=2,col='red',pch=16)
> lines(c(39,39),c(0,y[x=39]),col='red',lwd=3)
> for (k in 40:215)
+ lines(c(k,k),c(0,y[x=k]),col='blue',lwd=2)
\end{verbatim}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{8.1.4. 患哮喘病人比例的检验（正态分布近似） }

\begin{itemize}
\item  {\color{red}问题：完成上一页的假设检验问题。}

\item 解答：先写出统计量和计算统计值，再计算 $p$ 值和得出结论。

\begin{enumerate}

\item 使用正态分布近似，构造统计量 $U$ 或者等价的 $U^{\,2}$,
\[ U = \frac{X-Np_0}{\sqrt{Np_0(1-p_0)}},\,\,\, U^{\,2} = \frac{(X-Np_0)^2}{Np_0(1-p_0)}, \]
在 $H_0:p=p_0$ 为真时， 分别近似服从标准正态分布与卡方分布$\chi^2(1)$. 

\item  计算统计值与 $p$ 值，看到 $p$ 值较大，因此不能拒绝 $p=0.15$ 的假设。
\[ u^{\,2}=\frac{(39-215\times 0.15)^2}{215\times 0.15\times 0.85} =1.6621, \,\,\,
p=\mathbb{P}(U^{\,2}>u^{\,2}) =  0.1973. \]

\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{8.1.5. 患哮喘病人比例的检验（使用R程序） }

\begin{itemize}
\item  {\color{red}问题：使用R程序完成上一页的假设检验问题。}

\item 解答一：使用卡方统计量的单比例检验，
{\color{blue}
\begin{verbatim}
> prop.test(39,215,0.15)
\end{verbatim}
}

\item 解答一：使用二项分布的精确检验，
{\color{blue}
\begin{verbatim}
> binom.test(39,215,0.15)
\end{verbatim}
}

\item 得到$p$值分别是 0.2326 和 0.2135. 因此不能拒绝原假设。

\vspace{0.3cm}

\item {\color{red}问题：看来 prop.test 的连续性修正将 $p$ 值从 0.1973 变成了 0.2326. 这个具体是怎么做的？ }

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{8.1.6. 自由度为1的卡方分布的密度函数}

\begin{itemize}
\item  {\color{red}问题：画出卡方分布的密度函数的图像，解释使用卡方统计量的原理。}

\item 解答：使用 \,{\color{blue}\verb+dchisq()+} 函数来计算密度函数的值。

{\color{blue}
\begin{verbatim}
> x<-seq(0,4,0.05)
> y<-dchisq(x,df=1)
> plot(x,y,type='l')
> abline(v=0)
> abline(h=0)
> polygon(c(x[x>=1.65],x[81],1.65),c(y[x>=1.65],0,0), 
		col='red')
\end{verbatim}
}

\item 注释：\,{\color{blue}\verb+seq()+} 得到横坐标的等差数列，\,{\color{blue}\verb+abline()+} 画直线，\,{\color{blue}\verb+polygon()+} 用颜色填充一个多边形区域。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{8.1.7. 自由度为1的卡方分布的密度函数的图像}

\begin{figure}
\centering
\includegraphics[height=0.6\textheight, width=0.8\textwidth]{asthma-chisq-p-value.png}
\caption{p-value of chi-square with df=1 }
\end{figure}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{8.1.8. 使用精确的二项分布的假设检验（习题8.5.） }

\begin{itemize}

\item  {\color{red}问题：设显著性水平 $\alpha=0.05$. 使用双侧检验，检验假设
\[ H_0: p=p_0, \,\,\,\textrm{v.s.}\,\,\,\, H_1: p\neq p_0, \]
设现在15次试验中有3次成功。}

\begin{enumerate}
\item  {\color{red}对 $p_0$ 从0到1以0.001为间隔进行变化，计算所有这些假设检验的 $p$值的变化。提示：二项分布 $X\sim b(N,p_0)$ 的概率函数为
\[ \mathbb{P}(X=k) = \binom{N}{k} p_0^k (1-p_0)^{N-k}, \,\,\, k=0,1,2,\cdots,N. \] }

\item {\color{red}对每个 $p_0$ 计算双侧置信区间。观察 $p_0$ 的微小变化是否导致检验结果的很大变化？}
\item {\color{red}定义双侧置信区间的困难在哪里？}
\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{8.1.9. 手术后并发症的概率（习题8.1.） }

\begin{itemize}
\item {\color{red}问题：对某种疾病，现有的手术方法，术后并发症的发生频率为 20\%. 某医生使用新的手术方法，10个病人都没有出现并发症。}
\begin{enumerate}
\item[(1)]  {\color{red}使用二项分布进行假设检验。}
\item[(2)]  {\color{red}为说明新的手术方法的效果更好，需要连续多少病人都没有出现并发症？}
\end{enumerate}

%Reconsider the situation of Exercise 3.3, where 10 consecutive patients had operations without complications (并发症) and the expected rate was 20\%. Calculate the relevant one-sided test in the binomial distribution. How large a sample (still with zero complications) would be necessary to obtain statistical significance?

\vspace{0.3cm}

\item 解答：(1) 这里是单侧检验。解答步骤如下：

\begin{enumerate}
\item 写出原假设和备选假设。
\item 写出统计量和拒绝域。设显著性水平 $\alpha=0.05$. 
\item 计算样本容量，使得`零并发症'这件事成为小概率事件。即统计值落入拒绝域。
\end{enumerate}

\item 解答：(2) 计算样本容量，使得该事件发生的概率低于显著性水平。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{8.2.1. 双比例检验 Two independent proportions}

\begin{itemize}
\item  {\color{red}问题：设有二项分布的随机变量 $X_1\sim b(N_1,p_1)$ 和 $X_2\sim b(N_2,p_2)$. 
如何检验假设 $H_0: p_1=p_2$ ?}

%这两个比例 $X_1/N_1$ 与 $X_2/N_2$ 相等？
\item 解答：

\begin{enumerate}

\item 考虑下述统计量 $d$ 与 $U$, 计算相应的置信区间和 $p$ 值。
\[ d=\frac{X_1}{N_1} - \frac{X_2}{N_2}, \,\,\, U=\frac{d}{\sqrt{\text{Var}(d)}}. \]

\item 在 $H_0: p_1=p_2$ 为真时，统计量 $d$ 近似服从正态分布，且
\[\mathbb{E}(d)=0, \,\, \text{Var}(d) = \left(\frac{1}{N_1}+\frac{1}{N_2}\right) p(1-p).\]

\item 也可以考虑统计量 $U{\,}^2$, 在原假设为真时，$U{\,}^2\, \dot\sim \,\chi^2(1)$. 

\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{8.2.2. Lewitt-Machin 的例子（卡方检验）}

\begin{itemize}

\item  {\color{red}问题：设12次试验A里有9次成功，13次试验B里有4次成功。请问试验A和试验B的成功率的差是否显著不等于零？}%求这个差的置信区间。

\item 解答：使用 \,{\color{blue}\verb+prop.test()+} 函数。通过连续性修正，得到 $\chi^2 = 3.2793$, $p=0.07$. 与注释里按定义计算的结果不一样，这其中的细节是什么？

{\color{blue}
\begin{verbatim}
> lewitt.success<-c(9,4)
> lewitt.total<-c(12,13)
> prop.test(lewitt.success,lewitt.total)
data:  lewitt.success out of lewitt.total
X-squared = 3.2793, df = 1, p-value = 0.07016
\end{verbatim}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{8.2.3. }

\begin{itemize}

\item 按定义计算卡方统计量，得到 $\chi^2=4.8909$, $p=0.027$. 
{\small\color{blue}
\begin{verbatim}
> d<- 9/12 - 4/13
> vard<- (1/12+1/13)*(13/25)*(1-13/25)
> usq<- d^2/vard
> p<- 1-pchisq(usq,1)
\end{verbatim}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{8.2.4. Lewitt-Machin 的例子（Fisher 精确检验：原理） }

\begin{itemize}
\item  {\color{red}问题：已知下述表格，使用 Fisher 精确检验方法，检验假设}
\begin{eqnarray*}
{\color{red} H_0: p_1=p_2, \,\,\, \textrm{v.s.} \,\,\, H_1:p_1\neq p_2.  }
\end{eqnarray*}

\begin{table}[ht]
\begin{tabular}{|c|c|c|c|c|c|} \hline
           & 成功次数 & 失败次数 & 总次数 & 成功率参数 & 参数估计  \\ \hline
试验A  & {\bf\color{blue} 9} 		& 3 		  & {\color{red}12}  & $p_1$  &  9/12  \\ \hline
试验B & 4 		& 9 		  & {\color{red}13}  & $p_2$  & 4/13  \\ \hline
 总次数   & {\color{red} 13}            & {\color{red}12}	  & {\color{red}25}  & $p$ & 13/25  \\ \hline     
\end{tabular}
\end{table}

\item 解答：在给定上述红色标注的边际值时，左上角 $2\times 2$ 表格的分布情况可以由组合学方法精确计算。
已知其中一个数字，可以确定其余三个数字。{\color{red}疑问：第一个数字是 9 的概率是多少？}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{8.2.5. Lewitt-Machin 的例子（Fisher 精确检验：代码）}

\begin{itemize}
\item 注释：置信区间是优势比 $\frac{p_1/(1-p_1)}{p_2/(1-p_2)}$ 的。{\color{red}疑问：检验的 $p$ 值小于0.05, 但是优势比的 95\% 的置信区间包含1，似乎有矛盾。这是什么原因？}

{\small\color{blue}
\begin{verbatim}
> A<-matrix(c(9,4,3,9),2)
> fisher.test(A)
	Fisher's Exact Test for Count Data
data:  A
p-value = 0.04718
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
  0.9006803 57.2549701
sample estimates:
odds ratio 
  6.180528 
 \end{verbatim}
}
 
 
\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{8.2.6. Lewitt-Machin 的例子（Pearson 卡方检验） }

\begin{itemize}
\item {\color{red}问题：使用皮尔逊卡方统计量，检验LM例子中比例相等的假设。}

\item 解答：使用 \,{\color{blue}\verb+chisq.test()+} 函数，输入参数与 \,{\color{blue}\verb+fisher.test()+} 一样。

{\color{blue}
\begin{verbatim}
> chisq.test(A)
Pearson's Chi-squared test with Yates' continuity 
        correction
        
data:  A
X-squared = 3.2793, df = 1, p-value = 0.07016
 \end{verbatim}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{8.2.7.  }

\begin{itemize}

\item {\color{red}问题：使用 \verb+prop.test()+, 检验LM例子中比例相等的假设。}

{\color{blue}
\begin{verbatim}
> prop.test(c(9,4),c(12,13))
2-sample test for equality of proportions with continuity 
        correction
        
data:  c(9, 4) out of c(12, 13)
X-squared = 3.2793, df = 1, p-value = 0.07016
 \end{verbatim}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{8.3.1. R语言查看变量的类型和结构}

\begin{itemize}
\item  {\color{red}问题：如何查看一个变量的数据类型和结构？}

\item 解答：可以用以下这些函数。

\begin{itemize}
\item class(): Return the class of an object.
\item mode(): Get or set the type or storage mode of an object.
\item typeof(): Determines the type or storage mode of an object.
\item str(): Compactly display the structure.
\item dim(): Retrieve or set the dimension of an object.
\item length(): Get or set the length of vectors, including lists, and factors.
\item nrow() and ncol(): return the number of rows or columns present in an array.
\end{itemize}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{8.3.2. 产妇是否剖腹产与鞋码大小的关系（数据）}

\begin{itemize}
\item {\color{red}问题：载入数据 \verb+caesar.shoe+, 解释这个数据的类型。}

\item 解答：这是一个带名称的 $2\times 6$ 数值型矩阵。行名称是鞋码大小，列名称是是否剖腹产。矩阵数据是每个类别的产妇人数。
{\color{blue}
\begin{verbatim}
> library(ISwR)
> caesar.shoe
    <4  4 4.5  5 5.5  6+
Yes  5  7   6  7   8  10
No  17 28  36 41  46 140
> class(caesar.shoe)
[1] "matrix"
> typeof(caesar.shoe)
[1] "double"
 \end{verbatim}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{8.3.3. 产妇是否剖腹产与鞋码大小的关系（多比例检验）}

\begin{itemize}

\item {\color{red}问题：将产妇按鞋码大小分成了六组，考虑每组产妇里，剖腹产的人数占小组总人数的比例，检验这六个比例是否相等。}

\item 解答：使用 \,{\color{blue}\verb+prop.test()+} 函数，两个参数分别为剖腹产人数与小组总人数，这两个参数都是有六个数值的向量。结果：不能拒绝相等的假设。

{\color{blue}
\begin{verbatim}
> caesar.shoe.yes<-caesar.shoe[1,]
> caesar.shoe.total<-caesar.shoe[1,]+caesar.shoe[2,]
> prop.test(caesar.shoe.yes,caesar.shoe.total)
      6-sample test for equality of proportions 
      without continuity correction
data:  caesar.shoe.yes out of caesar.shoe.total
X-squared = 9.2874, df = 5, p-value = 0.09814
 \end{verbatim}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{8.3.4. 产妇是否剖腹产与鞋码大小的关系（简练的代码） }

\begin{itemize}
\item {\color{red}问题：把代码简化一些。}
%\item 解答：
{\color{blue}
\begin{verbatim}
> x<-c(5,7,6,7,8,10)
> y<-c(22,35,42,48,54,150)
> prop.test(x,y)
      6-sample test for equality of proportions 
      without continuity correction
data:  x out of y
X-squared = 9.3, df = 5, p-value = 0.1
alternative hypothesis: two.sided
sample estimates:
 prop 1  prop 2  prop 3  prop 4  prop 5  prop 6 
0.22727 0.20000 0.14286 0.14583 0.14815 0.06667 
 \end{verbatim}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{8.4.1. $r\times c$ 表格}

\begin{itemize}
\item {\color{red}问题：对于行与列都多于两个类的表格数据，如何检验假设``行与列之间没有关系''？}

\begin{table}[ht]
\begin{tabular}{|cccc|c|} \hline
$n_{11}$ & $n_{12}$ & $\cdots$ & $n_{1c}$ & $n_{1\cdot}$ \\  
$n_{21}$ & $n_{22}$ & $\cdots$ & $n_{2c}$ & $n_{2\cdot}$ \\  
$\vdots$ & $\vdots$ &   & $\vdots$ & $\vdots$ \\  
$n_{r1}$ & $n_{r2}$ & $\cdots$ & $n_{rc}$ & $n_{r\cdot}$ \\ \hline 
$n_{\cdot 1}$ & $n_{\cdot 2}$ & $\cdots$ & $n_{\cdot c}$ & $n_{\cdot\cdot}$ \\ \hline 
\end{tabular}
\end{table}

\item 解答：使用 \,{\color{blue}\verb+chisq.test()+} 和 \,{\color{blue}\verb+fisher.test()+} 进行检验。 统计量为
\[ \chi^2 = \sum\limits_{i,j} \frac{(O_{ij}-E_{ij})^2}{E_{ij}}, \,\, \textrm{ 其中 } E_{ij}=\frac{n_{i\cdot}\times n_{\cdot j}}{n_{\cdot\cdot}}. \]

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{8.4.2. 婚姻状况与咖啡因消费的关系（数据）}

\begin{itemize}
\item {\color{red}问题：录入数据，生成一个带名称的数值型矩阵。}

\item 解答：使用 \,{\color{blue}\verb+colnames()+} 和 \,{\color{blue}\verb+rownames()+} 来指定矩阵的行列名称。

{\small\color{blue}
\begin{verbatim}
> caff.marital <- matrix(c(652,1537,598,242,36,46,38,21,
+      218,327,106,67),nrow=3,byrow=T)
> caff.marital
> colnames(caff.marital)<-c('0','1-150','151-300','>300')
> rownames(caff.marital)<-c('married','prev.married','single')
> caff.marital
               0 1-150 151-300 >300
married      652  1537     598  242
prev.married  36    46      38   21
single       218   327     106   67
 \end{verbatim}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{8.4.3. 婚姻状况与咖啡因消费的关系（假设检验）}

\begin{itemize}

\item {\color{red}问题：检验``婚姻状况与咖啡因消费之间相互独立''的假设。}

\item 解答：使用 \,{\color{blue}\verb+chisq.test()+}, 结论是拒绝这个假设。

{\color{blue}
\begin{verbatim}
> chisq.test(caff.marital)
	Pearson's Chi-squared test
data:  caff.marital
X-squared = 50, df = 6, p-value = 2e-09
 \end{verbatim}
}

\item 按定义计算这个卡方统计量，验证上述结果中的统计值。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{8.4.4. 婚姻状况与咖啡因消费的关系（找出哪里不独立） }

\begin{itemize}

\item {\color{red}问题：找出不独立的具体位置。}

\item 解答：
比较观测数据矩阵，与独立性假设下的期望数据矩阵。读出使得卡方统计值很大的是哪些小组里哪些单元的数据。


{\small\color{blue}
\begin{verbatim}
> E<-chisq.test(caff.marital)$expected
> O<-chisq.test(caff.marital)$observed
> (E-O)^2/E
                  0 1-150 151-300  >300
married       4.106  1.61   0.687 0.886
prev.married  0.301  7.82   4.571 6.817
single       15.356  1.88   7.025 0.602
 \end{verbatim}
}


\item  {\color{red}疑问：如何从数据的统计分析给出这两个因素不独立的细节情况？}

\end{itemize}

\end{frame}




%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\end{document}
